1 Introduction

The purpose of this notebook is to define a dataframe with two column (cell barcodes and annotation) of all cells and clusters that we will include in the final dataset (cluster freeze). The cells to include come from three different sources:

  1. Cells profiled with scRNA-seq or multiome cells from the objects in advanced levels of the analysis for overrepresented cell types (T, B).
  2. Cells profiled with scRNA-seq from the objects in advanced levels of the analysis for underrepresented cell types (non-lymphoid). Remember that for these cells, the integration between multiome and RNA was masking biological heterogeneity, so we decided to just focus on scRNA-seq for the annotation and transfer the labels later on.
  3. Cells profiled with multiome from the objects in initial levels of the analysis for underrepresented cell types (non-lymphoid).

We will fetch the UMAP coords and annotation for (1) and (2) from the Seurat objects we saved in the previous notebook (00-harmonize_seurat_objects.Rmd). For the third case, we will upload intermediate Seurat objects and early levels of the analysis.

1.1 Load packages

library(Seurat)
library(harmony)
library(tidyverse)
library(here)

1.2 Define parameters

All the parameters (paths, colors, variables) are encoded in the file “utils_final_clusters.R”:

source(here("scRNA-seq/bin/utils_final_clusters.R"))

2 Non-lymphoid

2.1 Myeloid

# Read
myeloid <- readRDS(path_to_save_myeloid)
myeloid_multi <- readRDS(path_to_multiome_myeloid)
DimPlot(myeloid, group.by = "annotation_20220215", cols = color_palette)

DimPlot(myeloid, group.by = "annotation_figure_1", cols = color_palette)

table(myeloid_multi$assay)
## 
##       3P       5P multiome 
##     3017      189      407
DimPlot(myeloid_multi, cols = color_palette, split.by = "assay")

# Set up dataframe to plot figure 1
myeloid_multi <- subset(myeloid_multi, assay == "multiome")
myeloid_multi$barcode <- colnames(myeloid_multi)
myeloid_multi$annotation_figure_1 <- NA
myeloid_multi$annotation_20220215 <- "myeloid_multiome"
myeloid_multi$UMAP_1_20220215 <- NA
myeloid_multi$UMAP_2_20220215 <- NA
fig1_df <- myeloid@meta.data
condition1 <- all(selected_cols %in% colnames(myeloid_multi@meta.data))
condition2 <- all(myeloid_multi$assay == "multiome")
if (condition1 & condition2) {
  fig1_df <- bind_rows(fig1_df, myeloid_multi@meta.data[, selected_cols])
} else {
  stop("Conditions not met!")
}


# Remove
rm(myeloid, myeloid_multi)

2.2 Epithelial

# Read
epithelial <- readRDS(path_to_save_epithelial)
epithelial_multi <- readRDS(path_to_multiome_epithelial)
DimPlot(epithelial, group.by = "annotation_20220215", cols = color_palette)

DimPlot(epithelial, group.by = "annotation_figure_1", cols = color_palette)

table(epithelial_multi$assay)
## 
##       3P       5P multiome 
##      290        1       84
DimPlot(epithelial_multi, cols = color_palette, split.by = "assay")

# Set up dataframe to plot figure 1
epithelial_multi <- subset(epithelial_multi, assay == "multiome")
epithelial_multi$barcode <- colnames(epithelial_multi)
epithelial_multi$annotation_figure_1 <- "epithelial"
epithelial_multi$annotation_20220215 <- "epithelial_multiome"
epithelial_multi$UMAP_1_20220215 <- NA
epithelial_multi$UMAP_2_20220215 <- NA
condition1 <- all(selected_cols %in% colnames(epithelial_multi@meta.data))
condition2 <- all(epithelial_multi$assay == "multiome")
if (condition1 & condition2) {
  fig1_df <- bind_rows(
    fig1_df,
    epithelial@meta.data,
    epithelial_multi@meta.data[, selected_cols]
  )
} else {
  stop("Conditions not met!")
}


# Remove
rm(epithelial, epithelial_multi)

2.3 PDC

# Read
pdc <- readRDS(path_to_save_pdc)
pdc_multi <- readRDS(path_to_multiome_pdc)
DimPlot(pdc, group.by = "annotation_20220215", cols = color_palette)

DimPlot(pdc, group.by = "annotation_figure_1", cols = color_palette)

DimPlot(pdc_multi, cols = color_palette, split.by = "assay")

table(pdc$hospital)
## 
##   CIMA Clinic 
##    243     79
table(pdc_multi$hospital)
## 
##         CIMA       Clinic Royal London 
##          377          164           88
pdc <- subset(pdc, hospital != "Royal London")
pdc_multi <- subset(pdc_multi, hospital != "Royal London")


# Set up dataframe to plot figure 1
pdc_multi <- subset(pdc_multi, assay == "multiome")
pdc_multi$barcode <- colnames(pdc_multi)
pdc_multi$annotation_figure_1 <- "PDC"
pdc_multi$annotation_20220215 <- "PDC_multiome"
pdc_multi$UMAP_1_20220215 <- NA
pdc_multi$UMAP_2_20220215 <- NA
condition1 <- all(selected_cols %in% colnames(pdc_multi@meta.data))
condition2 <- all(pdc_multi$assay == "multiome")
if (condition1 & condition2) {
  fig1_df <- bind_rows(
    fig1_df,
    pdc@meta.data,
    pdc_multi@meta.data[, selected_cols]
  )
} else {
  stop("Conditions not met!")
}


# Remove
rm(pdc, pdc_multi)

2.4 FDC

# Read
fdc <- readRDS(path_to_save_fdc)
fdc_multi <- readRDS(path_to_multiome_fdc)
fdc_proliferative <- readRDS(path_to_fdc_proliferative)
DimPlot(fdc, group.by = "annotation_20220215", cols = color_palette)

DimPlot(fdc, group.by = "annotation_figure_1", cols = color_palette)

table(fdc_multi$assay)
## 
##       3P       5P multiome 
##     1508       26       54
DimPlot(fdc_multi, cols = color_palette, split.by = "assay")

table(fdc_proliferative$assay)
## 
##  3P 
## 223
table(fdc$hospital)
## 
##   CIMA Clinic 
##    908     18
fdc <- subset(fdc, hospital != "Royal London")


# Set up dataframe to plot figure 1
fdc_multi <- subset(fdc_multi, assay == "multiome")
fdc_multi$barcode <- colnames(fdc_multi)
fdc_multi$annotation_figure_1 <- "FDC"
fdc_multi$annotation_20220215 <- "FDC_multiome"
fdc_proliferative$annotation_figure_1 <- "cycling FDC"
fdc_proliferative$annotation_20220215 <- "cycling FDC"
fdc_multi$UMAP_1_20220215 <- NA
fdc_multi$UMAP_2_20220215 <- NA
fdc_proliferative$UMAP_1_20220215 <- NA
fdc_proliferative$UMAP_2_20220215 <- NA
condition1 <- all(selected_cols %in% colnames(fdc_multi@meta.data))
condition2 <- all(fdc_multi$assay == "multiome")
condition3 <- all(selected_cols %in% colnames(fdc_proliferative@meta.data))
if (condition1 & condition2 & condition3) {
  fig1_df <- bind_rows(
    fig1_df,
    fdc@meta.data,
    fdc_multi@meta.data[, selected_cols],
    fdc_proliferative@meta.data[, selected_cols],
  )
} else {
  stop("Conditions not met!")
}


# Remove
rm(fdc, fdc_multi, fdc_proliferative)

3 Precursor lymphocytes

3.1 preB

Note that, because we have very few preB cells, we couldn’t stritify them into meaningful clusters. Thus, all of them are labeled as “preB”:

# Read
preB <- readRDS(path_to_save_preB)
DimPlot(preB, group.by = "annotation_20220215", cols = color_palette)

DimPlot(preB, group.by = "annotation_figure_1", cols = color_palette)

table(preB$assay)
## 
##       3P multiome 
##       68        2
table(preB$hospital)
## 
##   CIMA Clinic 
##     69      1
DimPlot(preB, cols = color_palette, split.by = "hospital")

preB <- subset(preB, hospital != "Royal London")


# Set up dataframe to plot figure 1
condition1 <- all(selected_cols %in% colnames(preB@meta.data))
if (condition1) {
  fig1_df <- bind_rows(fig1_df, preB@meta.data[, selected_cols])
} else {
  stop("Conditions not met!")
}


# Remove
rm(preB)

3.2 preT

# Read
preT <- readRDS(path_to_save_preT)
DimPlot(preT, group.by = "annotation_20220215", cols = color_palette)

DimPlot(preT, group.by = "annotation_figure_1", cols = color_palette)

table(preT$assay)
## 
## 3P 
## 12
table(preT$hospital)
## 
## CIMA 
##   12
DimPlot(preT, cols = color_palette, split.by = "hospital")

preT <- subset(preT, hospital != "Royal London")


# Set up dataframe to plot figure 1
condition1 <- all(selected_cols %in% colnames(preT@meta.data))
if (condition1) {
  fig1_df <- bind_rows(fig1_df, preT@meta.data[, selected_cols])
} else {
  stop("Conditions not met!")
}


# Remove
rm(preT)

4 NK/ILC

All the cell types hereafter included multiome cells, so we only load the last object with all cells.

# Read
ilc_nk <- readRDS(path_to_save_ilc_nk)
DimPlot(ilc_nk, group.by = "annotation_20220215", cols = color_palette)

DimPlot(ilc_nk, group.by = "annotation_figure_1", cols = color_palette)

table(ilc_nk$assay)
## 
##       3P multiome 
##      473      192
table(ilc_nk$hospital)
## 
##   CIMA Clinic 
##    586     79
DimPlot(ilc_nk, cols = color_palette, split.by = "hospital")

# Set up dataframe to plot figure 1
condition1 <- all(selected_cols %in% colnames(ilc_nk@meta.data))
if (condition1) {
  fig1_df <- bind_rows(fig1_df, ilc_nk@meta.data[, selected_cols])
} else {
  stop("Conditions not met!")
}


# Remove
rm(ilc_nk)

5 CD8 T cells

# Read
cd8 <- readRDS(path_to_save_cd8)
DimPlot(cd8, group.by = "annotation_20220215", cols = color_palette)

DimPlot(cd8, group.by = "annotation_figure_1", cols = color_palette)

table(cd8$assay)
## 
##       3P multiome 
##     8203     1757
table(cd8$hospital)
## 
##   CIMA Clinic 
##   8757   1203
DimPlot(cd8, cols = color_palette, split.by = "hospital")

# Set up dataframe to plot figure 1
condition1 <- all(selected_cols %in% colnames(cd8@meta.data))
if (condition1) {
  fig1_df <- bind_rows(fig1_df, cd8@meta.data[, selected_cols])
} else {
  stop("Conditions not met!")
}


# Remove
rm(cd8)

6 CD4 T cells

# Read
cd4 <- readRDS(path_to_save_cd4)
cd4_proli <- readRDS(path_to_proliferative_cd4_t)
DimPlot(cd4, group.by = "annotation_20220215", cols = color_palette)

DimPlot(cd4, group.by = "annotation_figure_1", cols = color_palette)

table(cd4$assay)
## 
##       3P multiome 
##    39834    12473
table(cd4$hospital)
## 
##   CIMA Clinic 
##  41222  11085
DimPlot(cd4, cols = color_palette, split.by = "hospital")

table(cd4_proli$assay)
## 
##       3P multiome 
##     1677      183
table(cd4_proli$hospital)
## 
##   CIMA Clinic 
##   1709    151
DimPlot(cd4_proli, cols = color_palette, split.by = "hospital")

# Set up dataframe to plot figure 1
cd4_proli$annotation_figure_1 <- "cycling T"
cd4_proli$annotation_20220215 <- "cycling T"
cd4_proli$UMAP_1_20220215 <- NA
cd4_proli$UMAP_2_20220215 <- NA
condition1 <- all(selected_cols %in% colnames(cd4@meta.data))
condition2 <- all(selected_cols %in% colnames(cd4_proli@meta.data))
if (condition1 & condition2) {
  fig1_df <- bind_rows(
    fig1_df,
    cd4@meta.data,
    cd4_proli@meta.data[, selected_cols]
  )
} else {
  stop("Conditions not met!")
}


# Remove
rm(cd4, cd4_proli)

7 Plasma cells (PC)

The challenge with all the Seurat objects coming from the B cell lineage is that it also includes cells that belong to other cell types/states. For instance in the PC object, we have cells from GCBC and NBC/MBC. Hence, we will subset them to avoid duplication:

# Read
pc <- readRDS(path_to_save_pc)
DimPlot(pc, group.by = "annotation_20220215", cols = color_palette)

DimPlot(pc, group.by = "annotation_figure_1", cols = color_palette)

pc <- subset(pc, annotation_figure_1 == "PC")
table(pc$assay)
## 
##       3P multiome 
##     7252     1836
table(pc$hospital)
## 
##   CIMA Clinic 
##   7598   1490
DimPlot(pc, cols = color_palette, split.by = "hospital")

# Set up dataframe to plot figure 1
condition1 <- all(selected_cols %in% colnames(pc@meta.data))
if (condition1) {
  fig1_df <- bind_rows(fig1_df, pc@meta.data)
} else {
  stop("Conditions not met!")
}


# Remove
rm(pc)

8 NBC/MBC

# Read
nbc_mbc <- readRDS(path_to_save_nbc_mbc)
DimPlot(nbc_mbc, group.by = "annotation_20220215", cols = color_palette)

DimPlot(nbc_mbc, group.by = "annotation_figure_1", cols = color_palette)

nbc_mbc <- subset(nbc_mbc, annotation_level_1 == "NBC_MBC")
table(nbc_mbc$assay)
## 
##       3P multiome 
##    86563    25915
table(nbc_mbc$hospital)
## 
##   CIMA Clinic 
##  90908  21570
DimPlot(nbc_mbc, cols = color_palette, split.by = "hospital")

# Set up dataframe to plot figure 1
condition1 <- all(selected_cols %in% colnames(nbc_mbc@meta.data))
if (condition1) {
  fig1_df <- bind_rows(fig1_df, nbc_mbc@meta.data)
} else {
  stop("Conditions not met!")
}


# Remove
rm(nbc_mbc)

9 GCBC

# Read
gcbc <- readRDS(path_to_save_gcbc)
DimPlot(gcbc, group.by = "annotation_20220215", cols = color_palette)

DimPlot(gcbc, group.by = "annotation_figure_1", cols = color_palette)

table(gcbc$assay)
## 
##       3P multiome 
##    61724    10450
table(gcbc$hospital)
## 
##   CIMA Clinic 
##  69661   2513
DimPlot(gcbc, cols = color_palette, split.by = "hospital")

# Set up dataframe to plot figure 1
condition1 <- all(selected_cols %in% colnames(gcbc@meta.data))
if (condition1) {
  fig1_df <- bind_rows(fig1_df, gcbc@meta.data)
} else {
  stop("Conditions not met!")
}


# Remove
rm(gcbc)

10 Sanity checks

# Duplicated cells?
any(duplicated(fig1_df$barcode))
## [1] FALSE
# Lingering cells from King et al.?
any(fig1_df$hospital == "Royal London")
## [1] FALSE

11 Save

saveRDS(fig1_df, path_to_save_umap_df_rds)
write_delim(
  fig1_df,
  file = path_to_save_umap_df_csv,
  delim = ";",
  col_names = TRUE
)

12 Session information

sessionInfo()
## R version 4.0.1 (2020-06-06)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Red Hat Enterprise Linux Server release 6.7 (Santiago)
## 
## Matrix products: default
## BLAS:   /apps/R/4.0.1/lib64/R/lib/libRblas.so
## LAPACK: /apps/R/4.0.1/lib64/R/lib/libRlapack.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C               LC_TIME=es_ES.UTF-8        LC_COLLATE=en_US.UTF-8     LC_MONETARY=es_ES.UTF-8    LC_MESSAGES=en_US.UTF-8    LC_PAPER=es_ES.UTF-8       LC_NAME=C                  LC_ADDRESS=C               LC_TELEPHONE=C             LC_MEASUREMENT=es_ES.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] here_1.0.1         forcats_0.5.1      stringr_1.4.0      dplyr_1.0.4        purrr_0.3.4        readr_1.4.0        tidyr_1.1.2        tibble_3.0.6       ggplot2_3.3.3      tidyverse_1.3.0    harmony_0.1.0      Rcpp_1.0.6         SeuratObject_4.0.0 Seurat_4.0.0       BiocStyle_2.16.1  
## 
## loaded via a namespace (and not attached):
##   [1] readxl_1.3.1         backports_1.2.1      plyr_1.8.6           igraph_1.2.6         lazyeval_0.2.2       splines_4.0.1        listenv_0.8.0        scattermore_0.7      digest_0.6.27        htmltools_0.5.1.1    fansi_0.4.2          magrittr_2.0.1       tensor_1.5           cluster_2.1.0        ROCR_1.0-11          globals_0.14.0       modelr_0.1.8         matrixStats_0.58.0   colorspace_2.0-0     rvest_0.3.6          ggrepel_0.9.1        haven_2.3.1          xfun_0.21            crayon_1.4.1         jsonlite_1.7.2       spatstat_1.64-1      spatstat.data_2.0-0  survival_3.2-3       zoo_1.8-8            glue_1.4.2           polyclip_1.10-0      gtable_0.3.0         leiden_0.3.7         future.apply_1.7.0   abind_1.4-5          scales_1.1.1         DBI_1.1.1            miniUI_0.1.1.1       viridisLite_0.3.0    xtable_1.8-4         reticulate_1.18      htmlwidgets_1.5.3    httr_1.4.2           RColorBrewer_1.1-2   ellipsis_0.3.1       ica_1.0-2            farver_2.0.3         pkgconfig_2.0.3      sass_0.3.1           uwot_0.1.10          dbplyr_2.1.0         deldir_0.2-10        utf8_1.1.4           tidyselect_1.1.0     labeling_0.4.2       rlang_0.4.10        
##  [57] reshape2_1.4.4       later_1.1.0.1        munsell_0.5.0        cellranger_1.1.0     tools_4.0.1          cli_2.3.1            generics_0.1.0       broom_0.7.4          ggridges_0.5.3       evaluate_0.14        fastmap_1.1.0        yaml_2.2.1           goftest_1.2-2        knitr_1.31           fs_1.5.0             fitdistrplus_1.1-3   RANN_2.6.1           pbapply_1.4-3        future_1.21.0        nlme_3.1-148         mime_0.10            xml2_1.3.2           compiler_4.0.1       rstudioapi_0.13      plotly_4.9.3         png_0.1-7            spatstat.utils_2.0-0 reprex_1.0.0         bslib_0.2.4          stringi_1.5.3        highr_0.8            ps_1.5.0             lattice_0.20-41      Matrix_1.2-18        vctrs_0.3.6          pillar_1.5.0         lifecycle_1.0.0      BiocManager_1.30.10  lmtest_0.9-38        jquerylib_0.1.3      RcppAnnoy_0.0.18     data.table_1.14.0    cowplot_1.1.1        irlba_2.3.3          httpuv_1.5.5         patchwork_1.1.1      R6_2.5.0             bookdown_0.21        promises_1.2.0.1     KernSmooth_2.23-17   gridExtra_2.3        parallelly_1.23.0    codetools_0.2-16     MASS_7.3-51.6        assertthat_0.2.1     rprojroot_2.0.2     
## [113] withr_2.4.1          sctransform_0.3.2    mgcv_1.8-31          parallel_4.0.1       hms_1.0.0            grid_4.0.1           rpart_4.1-15         rmarkdown_2.7        Rtsne_0.15           shiny_1.6.0          lubridate_1.7.9.2